package com.spaceobj.project.component;

import cn.hutool.core.date.DateUtil;
import cn.hutool.core.lang.RegexPool;
import com.spaceobj.project.pojo.Ccgp;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;
import org.springframework.util.StringUtils;

import java.io.IOException;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@Component
public class CCGPList {

    public static final String CCGP_LIST = "http://www.ccgp.gov.cn/cggg/dfgg/gkzb/";

    public static final String PROJECT_LIST_CLASS = ".c_list_bid>li";

    /**
     * 详细内容
     */
    public static final String PROJECT_DETAIL_LIST_CLASS = ".vF_detail_content";

    public static final String CHINA = "中国";

    public static final String GOVERNMENT = "政府";

    public static final String MOBILE = "手机号：";

    public static final String PHONE = "座机号：";

    public static final String HREF = "href";

    public static final String A = "a";

    public static Ccgp regexContentMobile(String content) {

        Ccgp ccgp = Ccgp.builder().build();

        String phone = "";
        String mobile = "";
        Pattern phonePattern = Pattern.compile(RegexPool.TEL_400_800);
        Pattern mobilePattern = Pattern.compile(RegexPool.MOBILE);

        Matcher mobileMatcher = mobilePattern.matcher(content);
        Matcher phoneMatcher = phonePattern.matcher(content);

        String paramStr = new String(content);

        // 校验电话号码
        while (phoneMatcher.find()) {
            String group = phoneMatcher.group();
            phone = phone + group + ";";
            paramStr = paramStr.replaceAll(group, group.substring(0, 3) + "****" + group.substring(7, 10));
        }

        // 校验手机号码
        while (mobileMatcher.find()) {
            String group = mobileMatcher.group();
            mobile = mobile + group + ";";
            paramStr = paramStr.replaceAll(group, group.substring(0, 3) + "****" + group.substring(7, 11));
        }

        paramStr = paramStr.replaceAll(CHINA, "**").replaceAll(GOVERNMENT, "**").replaceAll(".gov.cn", ".com");

        ccgp.setPhoneNumber(PHONE + phone + MOBILE + mobile);
        ccgp.setContent(paramStr);
        return ccgp;
    }

    public List<Ccgp> getCCGPList() throws IOException {

        List<Ccgp> ccgplist = new ArrayList<>();
        Document doc = Jsoup.connect(CCGP_LIST).get();
        Elements elements = doc.select(PROJECT_LIST_CLASS);
        for (Element element : elements) {
            Elements childs = element.children();
            Elements hrefElement = childs.select(A);
            String href = hrefElement.attr(HREF);

            String projectDetailURL = CCGP_LIST + href.substring(2, href.length());
            Document projectDetailDoc = Jsoup.connect(projectDetailURL).get();

            Elements projectDetailElements = projectDetailDoc.select(PROJECT_DETAIL_LIST_CLASS);

            String projectDetailHtml = projectDetailElements.toString();

            // 设置基础属性
            Ccgp ccgp = regexContentMobile(projectDetailHtml);
            ccgp.setProjectUrl(projectDetailURL);
            if (StringUtils.isEmpty(ccgp.getContent())) {
                continue;
            }

            String name = childs.get(0).text();
            String time = childs.get(1).text();
            String area = childs.get(2).text();
            String person = childs.get(3).text();

            ccgp.setProjectName(name);
            ccgp.setCreateTime(DateUtil.parse(time).toLocalDateTime());
            ccgp.setIpAddress(area);
            ccgp.setCreateUser(person);
            ccgp.setPrice(BigDecimal.valueOf(1000000.00));

            ccgplist.add(ccgp);
        }
        return ccgplist;
    }

    public static void main(String[] args) throws Exception {

        CCGPList ccgpList = new CCGPList();
        List<Ccgp> ccgpList1 = ccgpList.getCCGPList();

        System.out.println(ccgpList1);
    }

}
